# ------------------------------------------------------------------------------#
# Description: Create bootstrap nonlinear and number of adoption datasets
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author:      Daniel A. Brent | dab320@psu.edu
# Created:     2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(pacman)
p_load(data.table, fixest, modelsummary, ggplot2)
options(scipen = 999)


# Load adoption data -----------------------------------------------------------
load("data/adoptions/adoptions.RData")

# Merge peer adoption buckets --------------------------------------------------
load("data/bucket/peer_adopt_buckets_any.RData")
dat <- merge(adopt, peer.adopt.bucket.any, by = c("pin", "year"))
rm(adopt, peer.adopt.bucket.any)

load("data/bucket/peer_adopt_buckets_rg.RData")
dat <- merge(dat, peer.adopt.bucket.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.rg)

load("data/bucket/peer_adopt_buckets_any_rg.RData")
dat <- merge(dat, peer.adopt.bucket.any.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.any.rg)

load("data/bucket/peer_adopt_buckets_cs.RData")
dat <- merge(dat, peer.adopt.bucket.cs, by = c("pin", "year"))
rm(peer.adopt.bucket.cs)

load("data/bucket/peer_adopt_buckets_both.RData")
dat <- merge(dat, peer.adopt.bucket.both, by = c("pin", "year"))
rm(peer.adopt.bucket.both)

# Merge peer eligibility buckets -----------------------------------------------
load("data/bucket/peer_elig_buckets_cs.RData")
dat <- merge(dat, peer.buckets.cs, by = c("pin", "year"))
rm(peer.buckets.cs)

load("data/bucket/peer_elig_buckets_rg.RData")
dat <- merge(dat, peer.buckets.rg, by = c("pin", "year"))
rm(peer.buckets.rg)

# Merge total peers ------------------------------------------------------------
load("data/bucket/total_peers.RData")
dat <- merge(dat, total.peers, by = "pin")
rm(total.peers)

# Merge parcel data ------------------------------------------------------------
load("data/parcel_build/parcel_build.RData")
dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve,
                                   ren.pre90, ren.90.99, ren.00.09, ren.10,
                                   water.prob)], by = "pin")
rm(parcel.build)

# Merge census data ------------------------------------------------------------
load("data/census/all_parcels_census_2010.RData")
parcels.2010[, blk := as.numeric(paste0(block, blkgrp))]
dat <- merge(dat, parcels.2010[, .(pin, tract, blkgrp, block, blk)])
rm(parcels.2010)

# Scale peer adoption variables ------------------------------------------------
dat[, peer.adopt.any.100.scale     := peer.adopt.any.100 / 100]
dat[, peer.adopt.any.rg.100.scale  := peer.adopt.any.rg.100 / 100]
dat[, peer.adopt.rg.100.scale      := peer.adopt.rg.100 / 100]
dat[, peer.adopt.cs.100.scale      := peer.adopt.cs.100 / 100]
dat[, peer.adopt.both.100.scale    := peer.adopt.both.100 / 100]

# Keep post-2010 only 
dat <- dat[year > 2010]

# Create average eligibility variable ------------------------------------------
dat[, elig.peers.avg.100 := elig.peers.cs.100 + elig.peers.rg.100]

# Create Bootstrap Nonlinear  --------------------------------------------------
boot.samp <- unique(na.omit(dat[elig.cs==1 & post.rainwise!=1,
                                list(pin,year,basin,tract,blkgrp,blk,sub.area,adopt.any,peer.adopt.any.100,
                                     elig.peers.avg.100,peers.100)]))
size <- uniqueN(boot.samp$pin)

B <- 1000
boots <- data.table(
  rep = 1:B,
  probit = rep(0,B),
  probit2 = rep(0,B)
)

for (b in 1:B){
  boot.temp <- boot.samp[unique(boot.samp[, list(pin)])[sample(.N, size, replace = TRUE)], on = "pin"]
  
  boot.temp[,peer.adopt.any.100.resid := feols(peer.adopt.any.100 ~ peers.100 + elig.peers.avg.100 |
                                                 basin + year,
                                               cluster = 'basin',
                                               data=boot.temp)$residuals]
  
  boots[rep==b,probit := feglm(adopt.any ~ peers.100 + peer.adopt.any.100 + peer.adopt.any.100.resid|
                                 basin + year,
                               cluster='basin', lean = T, notes = F, glm.iter = 100,
                               data=boot.temp,
                               family = binomial(link = 'probit'))$coeftable[2,1]
  ]
  
  boots[rep==b,probit2 := feglm(adopt.any ~ peers.100 + peer.adopt.any.100 + 
                                  peer.adopt.any.100.resid + peer.adopt.any.100.resid^2|
                                  basin + year,
                                cluster='basin', lean = T, notes = F, glm.iter = 100,
                                data=boot.temp,
                                family = binomial(link = 'probit'))$coeftable[2,1]
  ]
  
}

#Save
save(boots,file = paste0('data/boostrap/bootstrap_nonlinear.RData'))


B <- 1000
boots <- data.table(
  rep = 1:B,
  probit_c = rep(0,B),
  probit2_c = rep(0,B)
)

set.seed(12345)
for (b in 1:B){
  
  boot.temp <- boot.samp[unique(boot.samp[, list(pin)])[sample(.N, size, replace = TRUE)], on = "pin"]
  
  boot.temp[,peer.adopt.any.100.resid := feols(peer.adopt.any.100 ~ peers.100 + elig.peers.avg.100 |
                                                 basin + year,
                                               cluster = 'basin',
                                               data=boot.temp)$residuals]
  boot.temp <- boot.temp[,peer.adopt.any.100.resid2:=peer.adopt.any.100.resid^2]
  probit.temp <- feglm(adopt.any ~ peers.100 + peer.adopt.any.100+ peer.adopt.any.100.resid |
                         basin + year,
                       cluster='basin', lean = F, notes = F, fixef.rm = 'both', glm.iter = 100,
                       data=boot.temp,
                       family = binomial(link = 'probit'))
  me.c.temp <- slopes(probit.temp,variables = 'peer.adopt.any.100')
  boots[rep==b,probit_c := mean(me.c.temp$estimate)]
  probit2.temp <- feglm(adopt.any ~ peers.100 + peer.adopt.any.100+ peer.adopt.any.100.resid +
                          peer.adopt.any.100.resid2|
                          basin + year,
                        cluster='basin', lean = F, notes = F, fixef.rm = 'both', glm.iter = 100,
                        data=boot.temp,
                        family = binomial(link = 'probit'))
  me.c.temp <- slopes(probit2.temp,variables = 'peer.adopt.any.100')
  boots[rep==b,probit2_c := mean(me.c.temp$estimate)]
  print(b)
}  

#Save
save(boots,file = paste0('data/boostrap/bootstrap_nonlinear_ame.RData'))

# Create Bootstrap Number of Adoptions - 100 Peers --------------------------------------------------

dat[,peer.adopt.any.100.1 := ifelse(peer.adopt.any.100==1,1,0)]
dat[,peer.adopt.any.100.2 := ifelse(peer.adopt.any.100==2,1,0)]
dat[,peer.adopt.any.100.3 := ifelse(peer.adopt.any.100==3,1,0)]
dat[,peer.adopt.any.100.4 := ifelse(peer.adopt.any.100==4,1,0)]
dat[,peer.adopt.any.100.5 := ifelse(peer.adopt.any.100==5,1,0)]
dat[,peer.adopt.any.100.6 := ifelse(peer.adopt.any.100>=6,1,0)]

boot.samp <- unique(na.omit(dat[elig.rg==1 & post.rainwise!=1,
                                list(pin,year,blkgrp,adopt.any,peer.adopt.any.100,
                                     peer.adopt.any.100.1,peer.adopt.any.100.2,
                                     peer.adopt.any.100.3,peer.adopt.any.100.4,
                                     peer.adopt.any.100.5,peer.adopt.any.100.6,
                                     elig.peers.avg.100,peers.100)]))

set.seed(123527)
size <- uniqueN(boot.samp$pin)
B <- 1000
boots <- data.table(
  rep = 1:B,
  a1 = rep(0,B),
  a2 = rep(0,B),
  a3 = rep(0,B),
  a4 = rep(0,B),
  a5 = rep(0,B),
  a6 = rep(0,B)
)

for (b in 1:B){
  boot.temp <- boot.samp[unique(boot.samp[, list(pin)])[sample(.N, size, replace = TRUE)], on = "pin"]
  
  boot.temp[,peer.adopt.any.100.resids := feols(peer.adopt.any.100 ~ peers.100 + elig.peers.avg.100 |
                                                  blkgrp + year,
                                                cluster = 'blkgrp',
                                                data=boot.temp)$residuals]
  
  m.temp <- feols(adopt.any*100 ~ peer.adopt.any.100.1 + peer.adopt.any.100.2 + peer.adopt.any.100.3 + 
                    peer.adopt.any.100.4 + peer.adopt.any.100.5 + peer.adopt.any.100.6 +
                    peer.adopt.any.100.resids + peers.100 | blkgrp + year,
                  cluster = 'blkgrp',
                  data=boot.temp)
  
  for (i in 1:6) {
    boots[rep==b,paste0('a',i) := m.temp$coefficients[i]]
    
  }
  
}

#Save
save(boots,file = paste0('data/bootstrap/bootstrap_num_adopt.RData'))


